COVID-19 GLOBAL ANALYSIS

Visualization for data updated till 4th May 2020

Importing packages

In [1]:
import numpy as np
import pandas as pd

pd.set_option("display.max_columns",None) 
pd.set_option("display.max_rows",None) 

import warnings
warnings.filterwarnings("ignore")

import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
pio.templates.default = "seaborn"
from plotly.subplots import make_subplots

Reading datasets

In [2]:
confirmed_df = pd.read_csv("covid_19_confirmed_global.csv", parse_dates = ['date'])
deaths_df = pd.read_csv("covid_19_deaths_global.csv", parse_dates = ['date'])
recovered_df = pd.read_csv("covid_19_recovered_global.csv", parse_dates = ['date'])
cw_df = pd.read_csv("covid_19_country_global.csv", parse_dates = ['date'])
In [3]:
print(confirmed_df.shape)
print(deaths_df.shape)
print(recovered_df.shape)
print(cw_df.shape)
(27664, 6)
(27664, 6)
(26208, 6)
(19448, 5)
In [4]:
confirmed_df.head()
Out[4]:
state country lat long date confirmed
0 NaN Afghanistan 33.0000 65.0000 2020-01-22 0
1 NaN Albania 41.1533 20.1683 2020-01-22 0
2 NaN Algeria 28.0339 1.6596 2020-01-22 0
3 NaN Andorra 42.5063 1.5218 2020-01-22 0
4 NaN Angola -11.2027 17.8739 2020-01-22 0
In [5]:
deaths_df.head()
Out[5]:
state country lat long date deaths
0 NaN Afghanistan 33.0000 65.0000 2020-01-22 0
1 NaN Albania 41.1533 20.1683 2020-01-22 0
2 NaN Algeria 28.0339 1.6596 2020-01-22 0
3 NaN Andorra 42.5063 1.5218 2020-01-22 0
4 NaN Angola -11.2027 17.8739 2020-01-22 0
In [6]:
recovered_df.head()
Out[6]:
state country lat long date recovered
0 NaN Afghanistan 33.0000 65.0000 2020-01-22 0
1 NaN Albania 41.1533 20.1683 2020-01-22 0
2 NaN Algeria 28.0339 1.6596 2020-01-22 0
3 NaN Andorra 42.5063 1.5218 2020-01-22 0
4 NaN Angola -11.2027 17.8739 2020-01-22 0
In [7]:
cw_df.head()
Out[7]:
date country confirmed deaths recovered
0 2020-01-22 Afghanistan 0 0 0
1 2020-01-22 Albania 0 0 0
2 2020-01-22 Algeria 0 0 0
3 2020-01-22 Andorra 0 0 0
4 2020-01-22 Angola 0 0 0
In [8]:
a = cw_df.date.value_counts().sort_index()
print('The first date is:',a.index[0])
print('The last date is:',a.index[-1])
The first date is: 2020-01-22 00:00:00
The last date is: 2020-05-04 00:00:00

Analyzing COVID-19 Spread

In [9]:
df1 = confirmed_df
df1['date'] = pd.to_datetime(df1['date'])
df1['date'] = df1['date'].dt.strftime('%m/%d/%Y')
df1 = df1.fillna('-')
fig = px.density_mapbox(df1, lat='lat', lon='long', z='confirmed', radius=20,zoom=1, 
                        hover_data=["country",'state',"confirmed"], mapbox_style="carto-positron", 
                animation_frame = 'date', range_color= [0, 2000],title='COVID-19 Spread Analysis')
fig.update_layout(margin={"r":0,"t":30,"l":0,"b":0})
fig.show()

Active cases in each country

In [10]:
# Active Cases = confirmed - deaths - recovered
cw_df['active'] = cw_df['confirmed'] - cw_df['deaths'] - cw_df['recovered']
In [11]:
top = cw_df[cw_df['date'] == cw_df['date'].max()]
world = top.groupby('country')['confirmed','active','deaths'].sum().reset_index()
world.head()
Out[11]:
country confirmed active deaths
0 Afghanistan 2894 2407 90
1 Albania 803 229 31
2 Algeria 4648 2185 465
3 Andorra 750 206 45
4 Angola 35 22 2
In [12]:
fig = px.choropleth(world, locations="country", 
                    locationmode='country names', color="active", 
                    hover_name="country", range_color=[1,20000], 
                    color_continuous_scale="Peach",
                    title='Active cases in each country')
fig.update_layout(width=1000,height=640)
fig.show()

Recovered count of each country

In [13]:
fig = px.scatter_mapbox(recovered_df , lat="lat", lon="long", hover_name="country", 
                        hover_data=["country","recovered"], color_discrete_sequence=["mediumspringgreen"], 
                        zoom=1, height=480, title='Recovered count of each country' )
fig.update_layout(
    mapbox_style="white-bg",
    mapbox_layers=[
        {
            "below": 'traces',
            "sourcetype": "raster",
            "source": [
                "https://basemap.nationalmap.gov/arcgis/rest/services/USGSImageryOnly/MapServer/tile/{z}/{y}/{x}"
            ]
        }
      ])
fig.update_layout(margin={"r":0,"t":30,"l":0,"b":0})
fig.show()

Death count of each country

In [14]:
world['size'] = world['deaths'].pow(0.25)
fig = px.scatter_geo(world, locations='country',locationmode='country names', color='deaths', 
                     color_continuous_scale='plotly3', hover_name='country', size='size',
                     hover_data = ['country','deaths'], projection='natural earth',
                     title='Death count of each country')
fig.show()

Worldwide confirmed cases over time

In [15]:
total_cases = confirmed_df.groupby('date')['date', 'confirmed'].sum().reset_index()
total_cases['date'] = pd.to_datetime(total_cases['date'])

fig = go.Figure()
fig.add_trace(go.Scatter(x=total_cases['date'], y=total_cases['confirmed'],
                   mode='lines+markers', line = dict(color='orangered', width=2)))
fig.update_layout(title='Worldwide Confirmed Cases Over Time',
                   xaxis_title='Date',
                   yaxis_title='Total Cases')
fig.show()

Top 20 countries having most confirmed cases

In [16]:
top = cw_df[cw_df['date'] == cw_df['date'].max()]
top_casualities = top.groupby(by = 'country')['confirmed'].sum().sort_values(ascending = False).head(20).reset_index()
top_casualities
Out[16]:
country confirmed
0 US 1180375
1 Spain 218011
2 Italy 211938
3 UK 191832
4 France 169583
5 Germany 166152
6 Russia 145268
7 Turkey 127659
8 Brazil 108620
9 Iran 98647
10 China 83966
11 Canada 61957
12 Belgium 50267
13 Peru 47372
14 India 46437
15 Netherlands 40968
16 Ecuador 31881
17 Switzerland 29981
18 Saudi Arabia 28656
19 Portugal 25524
In [17]:
fig = px.bar(top_casualities , x='confirmed', y='country', orientation='h', 
             color='country', color_discrete_sequence=px.colors.qualitative.Plotly)
fig.update_layout(title='Top 20 countries having most confirmed cases',
                   xaxis_title='Total cases',
                   yaxis_title='Country', showlegend=False)
fig.show()

Top 20 countries having most active cases

In [18]:
top_actives = top.groupby(by = 'country')['active'].sum().sort_values(ascending = False).head(20).reset_index()
top_actives
Out[18]:
country active
0 US 924273
1 UK 162113
2 Russia 125817
3 Italy 99980
4 France 92903
5 Spain 71240
6 Turkey 56032
7 Brazil 55438
8 Netherlands 35732
9 India 32024
10 Canada 31924
11 Peru 31601
12 Belgium 29965
13 Ecuador 26879
14 Germany 26459
15 Saudi Arabia 23989
16 Portugal 22749
17 Singapore 17303
18 Sweden 15878
19 Pakistan 14830
In [19]:
fig = px.bar(top_actives , x='active', y='country', orientation='h', 
             color='country', color_discrete_sequence=px.colors.qualitative.Plotly)
fig.update_layout(title='Top 20 countries having most active cases',
                   xaxis_title='Total cases',
                   yaxis_title='Country', showlegend=False)
fig.show()

Top 20 countries having most deaths

In [20]:
top_deaths = top.groupby(by = 'country')['deaths'].sum().sort_values(ascending = False).head(20).reset_index()
top_deaths
Out[20]:
country deaths
0 US 68922
1 Italy 29079
2 UK 28809
3 Spain 25428
4 France 25204
5 Belgium 7924
6 Brazil 7367
7 Germany 6993
8 Iran 6277
9 Netherlands 5098
10 China 4637
11 Canada 4003
12 Turkey 3461
13 Sweden 2769
14 Mexico 2271
15 Switzerland 1784
16 Ecuador 1569
17 India 1566
18 Russia 1356
19 Peru 1344
In [21]:
fig = px.bar(top_deaths , x='deaths', y='country', orientation='h', 
             color='country', color_discrete_sequence=px.colors.qualitative.Plotly)
fig.update_layout(title='Top 20 countries having most deaths',
                   xaxis_title='Total cases',
                   yaxis_title='Country', showlegend=False)
fig.show()

Top 20 countries having most recovered cases

In [22]:
top_recovered = top.groupby(by = 'country')['recovered'].sum().sort_values(ascending = False).head(20).reset_index()
top_recovered
Out[22]:
country recovered
0 US 187180
1 Germany 132700
2 Spain 121343
3 Italy 82879
4 Iran 79379
5 China 78792
6 Turkey 68166
7 France 51476
8 Brazil 45815
9 Canada 26030
10 Switzerland 25200
11 Russia 18095
12 Peru 14427
13 Mexico 13447
14 Ireland 13386
15 Austria 13316
16 India 12847
17 Belgium 12378
18 Chile 10415
19 Israel 10064
In [23]:
fig = px.bar(top_recovered , x='recovered', y='country', orientation='h', 
             color='country', color_discrete_sequence=px.colors.qualitative.Plotly)
fig.update_layout(title='Top 20 countries having most recovered cases',
                   xaxis_title='Total cases',
                   yaxis_title='Country', showlegend=False)
fig.show()

Top 20 countries having highest mortality rate

In [24]:
rate = top.groupby(by = 'country')['recovered','confirmed','deaths'].sum().reset_index()
rate['recovery_percentage'] =  round(((rate['recovered']) / (rate['confirmed'])) * 100 , 2)
rate['death_percentage'] =  round(((rate['deaths']) / (rate['confirmed'])) * 100 , 2)
rate.head()
Out[24]:
country recovered confirmed deaths recovery_percentage death_percentage
0 Afghanistan 397 2894 90 13.72 3.11
1 Albania 543 803 31 67.62 3.86
2 Algeria 1998 4648 465 42.99 10.00
3 Andorra 499 750 45 66.53 6.00
4 Angola 11 35 2 31.43 5.71
In [25]:
mortality = rate.groupby(by = 'country')['death_percentage'].sum().sort_values(ascending = False).head(20).reset_index()
mortality
Out[25]:
country death_percentage
0 Nicaragua 33.33
1 MS Zaandam 22.22
2 Yemen 16.67
3 Belgium 15.76
4 UK 15.02
5 France 14.86
6 Italy 13.72
7 Bahamas 13.25
8 Sao Tome and Principe 13.04
9 Mauritania 12.50
10 Netherlands 12.44
11 Sweden 12.19
12 Antigua and Barbuda 12.00
13 Zimbabwe 11.76
14 Spain 11.66
15 Hungary 11.57
16 Belize 11.11
17 Haiti 11.00
18 Liberia 10.84
19 Suriname 10.00
In [26]:
fig = px.bar(mortality , x='death_percentage', y='country', orientation='h', 
             color='country', color_discrete_sequence=px.colors.qualitative.Plotly)
fig.update_layout(title='Top 20 countries having highest mortality rate',
                   xaxis_title='Mortality rate in percentage',
                   yaxis_title='Country', showlegend=False)
fig.show()

Top 20 countries having highest recovery rate

In [27]:
recovery = rate.groupby(by = 'country')['recovery_percentage'].sum().sort_values(ascending = False).head(20).reset_index()
recovery
Out[27]:
country recovery_percentage
0 Papua New Guinea 100.00
1 Cambodia 98.36
2 Iceland 95.78
3 Mauritius 95.18
4 Brunei 94.20
5 China 93.84
6 Thailand 91.73
7 Diamond Princess 90.59
8 Suriname 90.00
9 Luxembourg 88.95
10 New Zealand 87.62
11 Australia 85.98
12 South Korea 85.92
13 Trinidad and Tobago 85.34
14 Austria 85.24
15 Switzerland 84.05
16 Saint Lucia 83.33
17 Timor-Leste 83.33
18 Western Sahara 83.33
19 Malta 83.12
In [28]:
fig = px.bar(recovery , x='recovery_percentage', y='country', orientation='h', 
             color='country', color_discrete_sequence=px.colors.qualitative.Plotly)
fig.update_layout(title='Top 20 countries having highest recovery rate',
                   xaxis_title='Recovery rate in percentage',
                   yaxis_title='Country', showlegend=False)
fig.show()